
# Setup ------------------------------------------------------------------------
  # Options
  options(stringsAsFactors = F)
  options(bitmapType = "cairo")
  # Packages
  library(pacman)
  p_load(foreign, data.table, stringr, lubridate,
    rgeos, rgdal, broom, maptools, rmapshaper, geosphere,
    ggplot2, ggthemes, viridis, Cairo, extrafont,
    magrittr)
  # Directories
  dir_gas     <- "/Users/edwardarubin/Dropbox/Research/MyProjects/NaturalGas/"
  dir_spatial <- paste0(dir_gas, "DataSpatial/")
  dir_csv     <- paste0(dir_gas, "DataCsv/")
  dir_rds     <- paste0(dir_gas, "DataR/")
  dir_geozips <- paste0(dir_csv, "ZipCodes/GeoCodedCalifornia/")
  dir_output  <- paste0(dir_gas, "Output/")
  dir_figures <- paste0(dir_gas, "Figures/")

# My ggplot2 theme -------------------------------------------------------------
  # Define colors
  dark_grey <- "#7D7D7D"
  light_grey <- "#C7C7C7"
  # My ggplot themes
  theme_beamer <- theme_bw() + theme(
    panel.border = element_blank(),
    axis.ticks = element_blank(),
    legend.position = 'bottom',
    legend.key = element_blank(),
    panel.grid = element_blank(),
    text = element_text(family = "Roboto", color = "black",
      size = 16),
    title = element_text(size = 18),
    legend.text = element_text(size = 18),
    legend.key.size = unit(2.5, "cm"))
  theme_paper <- theme_bw() + theme(
    panel.border = element_blank(),
    axis.ticks = element_blank(),
    legend.position = 'bottom',
    legend.key = element_blank(),
    panel.grid = element_blank(),
    text = element_text(family = "Bitstream Charter", color = "black",
      size = 11),
    axis.text = element_text(color = "black"),
    title = element_text(size = 12),
    legend.text = element_text(size = 11),
    legend.key.size = unit(1.5, "cm"))

# Save functions ---------------------------------------------------------------
  # My own save functions
  save_beamer <- function(gg_tmp, name, width = 16 * 0.7, height = 10 * 0.7,
    themes = NULL) {
      ggsave(filename = paste0(name, ".pdf"),
        path = dir_figures,
        plot = gg_tmp + theme_beamer + themes,
        width = width,
        height = height,
        device = cairo_pdf)
      embed_fonts(paste0(dir_figures, name, ".pdf"))
  }
  save_paper <- function(gg_tmp, name, width = 6.4, height = 4,
    themes = NULL) {
      ggsave(filename = paste0(name, ".pdf"),
        path = dir_figures,
        plot = gg_tmp + theme_paper + themes,
        width = width,
        height = height,
        device = cairo_pdf)
      embed_fonts(paste0(dir_figures, name, ".pdf"))
  }

# Load and merge datasets ------------------------------------------------------
  # Load the common cities CSV
  common_cities <- fread(paste0(dir_csv, "commonCityZips.csv"))
  # Convert zips to character
  common_cities[, zip := as.character(zip)]
  # Load the common 5-digit zip codes (between PG&E and SoCalGas)
  common_zip5 <- fread(paste0(dir_csv, "commonZipsCountsPgeSocal.csv"))
  # Load common 9-digit zip code
  common_zip9 <- readRDS(paste0(dir_rds, "utilityZip9Map.rds"))
  # Convert the plus 4 to integer and change name
  common_zip9[, zip_plus4 := as.integer(zip_plus4)]
  setnames(common_zip9, old = "zip_plus4", new = "plus4")
  # Correct the 9-digit zip code for plus4's that only have 3 integer digits
  common_zip9[, zip9 := paste0(zip, str_pad(plus4, 4, "left", 0))]
# NOTE: Some observations are missing their full 9-digit zip code
  # Convert 9-digit zip code to integer (zip9 is NA if plus4 is NA)
  common_zip9[, zip9 := as.integer(zip9)]
  # Load CSV of utility presence by 5-digit zip code
  zip_utility <- fread(paste0(dir_csv, "utilityZipCodeMap.csv"))
  # Load the zip code shapefile
  zip_shp <- readOGR(
    dsn = paste0(dir_spatial, "CaliforniaZipCodes"),
    layer = "CaliforniaZipCodes")
  # Take subset of shapefile for zip codes that are in the common cities file
  zip_shp %<>% subset(ZIP_CODE %in% common_cities$zip)
  # Convert to data.table
  zip_dt <- tidy(zip_shp, region = "ZIP_CODE") %>% data.table()
  # Load the geocoded zip9s
  geo_dt <- lapply(X = 1:5, FUN = function(i) {
    dir_geozips %>% paste0("Set", i, "/set", i, ".dbf") %>%
      read.dbf() %>% data.table()
    }) %>% rbindlist()
  # Grab (and rename) desired variables for the zip codes in common_zip9
  geo_zip <- geo_dt[zip5 %in% common_zip9$zip, .(
    zip9       = zip9,
    lon        = X,
    lat        = Y,
    score      = Score,
    match_type = Match_type,
    loc_name   = Loc_name,
    city       = City
    )]
  # Merge geo_zip onto common_zip9
  zip9_dt <- merge(common_zip9, geo_zip, by = "zip9", all.x = T, all.y = F)

# Distances --------------------------------------------------------------------
  # Split the zip codes in zip9_dt by utility
  pge_dt <- zip9_dt[pge == T &
    !is.na(zip9), .(zip9, lon, lat, utility = "pge")]
  socal_dt <- zip9_dt[socal == T &
    !is.na(zip9), .(zip9, lon, lat, utility = "socal")]
  # Find the Haversine distance between the utilities' sets
  dist_matrix <- distm(
    x = as.matrix(pge_dt[, .(lon, lat)]),
    y = as.matrix(socal_dt[, .(lon, lat)]),
    fun = distHaversine)
  # Find each row's min
  pge_mins <- apply(X = dist_matrix, FUN = min, MARGIN = 1)
  socal_mins <- apply(X = dist_matrix, FUN = min, MARGIN = 2)
  # Attach these minima to their respective datasets
  pge_dt[, dist_other := pge_mins]
  socal_dt[, dist_other := socal_mins]
  # Joint dataset of distances
  dist_dt <- rbindlist(list(pge_dt, socal_dt))
  # Convert to km
  dist_dt[, dist_other := dist_other / 1e3]

# Plot 'common' 9-digit zip codes; color by utility ----------------------------
  ggplot(data = dist_dt, aes(lon, lat)) +
    geom_path(data = zip_dt, aes(long, lat, group = group),
      color = "grey80", size = 0.35) +
    geom_point(aes(color = utility), shape = 20, size = 0.5, alpha = 0.8) +
    coord_quickmap() +
    theme_beamer +
    theme(
      legend.title = element_text(face = "bold"),
      axis.text = element_blank(),
      legend.key.size = unit(1, "cm")) +
    guides(colour = guide_legend(override.aes = list(size = 18))) +
    xlab("") + ylab("") +
    scale_color_manual("Utility:",
      labels = c("PG&E", "SoCalGas"),
      values = c("#ffc107", "#310769"))

# Plot 'common' 9-digit zip codes; only PG&E -----------------------------------
  ggplot(data = dist_dt[utility == "pge"], aes(lon, lat)) +
    geom_path(data = zip_dt, aes(long, lat, group = group),
      color = "grey80", size = 0.35) +
    geom_point(aes(color = utility), shape = 20, size = 0.5, alpha = 0.8) +
    coord_quickmap() +
    theme_beamer +
    theme(
      legend.title = element_text(face = "bold"),
      axis.text = element_blank(),
      legend.key.size = unit(1, "cm")) +
    guides(colour = guide_legend(override.aes = list(size = 18))) +
    xlab("") + ylab("") +
    scale_color_manual("Utility:",
      labels = c("PG&E"),
      values = c("#FFC107"))

# Plot 'common' 9-digit zip codes; only SoCalGas -------------------------------
  ggplot(data = dist_dt[utility == "socal"], aes(lon, lat)) +
    geom_path(data = zip_dt, aes(long, lat, group = group),
      color = "grey80", size = 0.35) +
    geom_point(aes(color = utility), shape = 20, size = 0.5, alpha = 0.8) +
    coord_quickmap() +
    theme_beamer +
    theme(
      legend.title = element_text(face = "bold"),
      axis.text = element_blank(),
      legend.key.size = unit(1, "cm")) +
    guides(colour = guide_legend(override.aes = list(size = 18))) +
    xlab("") + ylab("") +
    scale_color_manual("Utility:",
      labels = c("SoCalGas"),
      values = c("#310769"))

# Plot 'common' 9-digit zip codes; color by distance to other utility ----------
  ggplot(data = dist_dt, aes(lon, lat)) +
    geom_path(data = zip_dt, aes(long, lat, group = group),
      color = "grey80", size = 0.35) +
    geom_point(aes(color = dist_other), shape = 20, size = 0.5, alpha = 0.8) +
    theme_beamer +
    theme(axis.text = element_blank()) +
    coord_quickmap() +
    xlab("") + ylab("") +
    scale_color_viridis("Distance (km) to other utility",
      option = "B", trans = "sqrt")
      # option = "B", trans = "log1p")
      # option = "B", trans = "identity")

# Plot 'common' 9-digit zip codes; color by distance to other utility ----------
  # Subset: Bakerfield area
  ggplot(data = dist_dt, aes(lon, lat)) +
    geom_path(data = zip_dt, aes(long, lat, group = group),
      color = "grey80", size = 0.35) +
    geom_point(aes(color = dist_other), shape = 20, size = 0.5, alpha = 0.8) +
    theme_beamer +
    theme(axis.text = element_blank()) +
    coord_quickmap() +
    xlim(-119.8, -118.16) +
    ylim(34.9, 35.75) +
    xlab("") + ylab("") +
    scale_color_viridis("Distance (km) to other utility",
      option = "B", trans = "sqrt")
      # option = "B", trans = "log1p")
      # option = "B", trans = "identity")

# Plot 'common' 9-digit zip codes; color by distance to other utility ----------
  # Subset: Fresno area
  ggplot(data = dist_dt, aes(lon, lat)) +
    geom_path(data = zip_dt, aes(long, lat, group = group),
      color = "grey80", size = 0.35) +
    geom_point(aes(color = dist_other), shape = 20, size = 0.5, alpha = 0.8) +
    theme_beamer +
    theme(axis.text = element_blank()) +
    coord_quickmap() +
    xlim(-120.1, -119.5) +
    ylim(36.45, 37) +
    xlab("") + ylab("") +
    scale_color_viridis("Distance (km) to other utility",
      option = "B", trans = "sqrt")
      # option = "B", trans = "log1p")
      # option = "B", trans = "identity")

# Plot 'common' 9-digit zip codes; color by distance to other utility ----------
  # Subset: Paso Robles area
  ggplot(data = dist_dt, aes(lon, lat)) +
    geom_path(data = zip_dt, aes(long, lat, group = group),
      color = "grey80", size = 0.35) +
    geom_point(aes(color = dist_other), shape = 20, size = 0.5, alpha = 0.8) +
    theme_beamer +
    theme(axis.text = element_blank()) +
    coord_quickmap() +
    xlim(-121.1, -120.4) +
    ylim(35.45, 35.8) +
    xlab("") + ylab("") +
    scale_color_viridis("Distance (km) to other utility",
      option = "B", trans = "sqrt")
      # option = "B", trans = "log1p")
      # option = "B", trans = "identity")
